# -*- coding: utf-8 -*-
import scrapy
from selenium import webdriver
from scrapy.linkextractors import LinkExtractor
options = webdriver.ChromeOptions()
options.add_argument('--headless')  #无界面模式

class GetfileSpider(scrapy.Spider):
    name = 'getfile'
    allowed_domains = ['nanjing.gov.cn']
    start_urls = ['http://www.nanjing.gov.cn/xxgkn/zdly/shbz/']

    def __init__(self):
        super().__init__()
        self.bro = webdriver.Chrome(options = options)


    def parse(self, response):
        # file_urls = response.xpath("//span[@class='d2']")
        # for file_url in file_urls:
        #     yield {
        #         'file_url' : file_url.xpath('./a/@href').extract()
        #     }
        le = LinkExtractor()
        links = le.extract_links(response)
        for link in links:
            yield {
                'file_urls' : [link.url]
            }



    def close(spider, reason):
        spider.bro.quit()  #关闭浏览器